

#' Plot sample coefficient of variation whitin group
#'
#' @param se a SummarizedExperiment data object
#'
#' @return a histogram plot of row coefficient of variation (standard deviation / mean) in each condition
#' @export
#'
#' @examples
#' data(Silicosis_pg)
#' data <- Silicosis_pg
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Differential test
#' ecols <- grep("LFQ.", colnames(data_unique))
#' se <- make_se_parse(data_unique, ecols,mode = "delim")
#' filt <- filter_se(se, thr = 0, fraction = 0.4, filter_formula = ~ Reverse != "+" & Potential.contaminant!="+")
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.05)
#' plot_cvs(imputed)
#'
plot_cvs<-function(se) {
  ## backtransform data
  untransformed_intensity<- 2^(assay(se)) %>% as.data.frame()
  exp_design<-colData(se)

  ## function to calulate coefficient of Variation
  coef_variation<-function(x){
    coef=sd(x)/mean(x)
  }

  ### merge untransformed to exp design and calculate cvs
  cvs_group<- untransformed_intensity %>%
    tibble::rownames_to_column() %>%
    tidyr::gather("ID", "Intensity", -rowname) %>%
    dplyr::left_join(.,data.frame(exp_design), by="ID") %>%
    dplyr::group_by(rowname,condition) %>%
    dplyr::summarise(cvs=coef_variation(Intensity)) %>%
    dplyr::group_by(condition)%>%
    dplyr::mutate(condition_median=median(cvs)) %>% dplyr::ungroup()

  p1 <-  ggplot(cvs_group, aes(cvs, color=condition, fill=condition)) +
    geom_histogram(alpha=.5, bins= 20, show.legend = FALSE) +
    facet_wrap(~condition) +
    geom_vline(aes(xintercept=condition_median, group=condition),color='grey40',
               linetype="dashed") +
    labs(title= 'Sample Coefficient of Variation', x="Coefficient of Variation", y="Count") +
    theme_DEP2() +
    theme(plot.title = element_text(hjust = 0.5,face = "bold"))

  p2 <- p1 +geom_text(aes(x = max(cvs)-0.6,
                    y = max(ggplot_build(p1)$data[[1]]$ymax*1.1),
                    label = paste0("Median =",round(condition_median,3)*100,"%",by="")),
                show.legend = FALSE, size=4)
  return(p2)

}



## following functions stay the same to DEP package

#' Plot row standard deviations versus row means
#'
#' \code{meanSdPlot} generates a hexagonal heatmap
#' of the row standard deviations versus row means
#' from SummarizedExperiment objects.
#' See \code{\link[vsn]{meanSdPlot}}.
#'
#' @param x SummarizedExperiment,
#' Data object.
#' @param ranks Logical,
#' Whether or not to plot the row means on the rank scale.
#' @param xlab Character,
#' x-axis label.
#' @param ylab Character,
#' y-axis label.
#' @param pch Ignored -
#' exists for backward compatibility.
#' @param plot Logical,
#' Whether or not to produce the plot.
#' @param bins Numeric vector,
#' Data object before normalization.
#' @param ... Other arguments,
#' Passed to \code{\link[ggplot2:geom_hex]{stat_binhex}}.
#' @return A scatter plot of row standard deviations
#' versus row means(generated by \code{\link[ggplot2:geom_hex]{stat_binhex}})
#' @examples
#' # Load example
#' data(Silicosis_pg)
#' data <- Silicosis_pg
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Differential test
#' ecols <- grep("LFQ.", colnames(data_unique))
#' se <- make_se_parse(data_unique, ecols,mode = "delim")
#' filt <- filter_se(se, thr = 0, fraction = 0.4, filter_formula = ~ Reverse != "+" & Potential.contaminant!="+")
#' norm <- normalize_vsn(filt)
#'
#' # Plot meanSdPlot
#' meanSdPlot(norm)
#' @export
meanSdPlot <- function(x,
                       ranks = TRUE,
                       xlab = ifelse(ranks, "rank(mean)", "mean"),
                       ylab = "sd",
                       pch,
                       plot = TRUE,
                       bins = 50,
                       ...) {
  vsn::meanSdPlot(assay(x),
                  ranks = ranks,
                  xlab = xlab,
                  ylab = ylab,
                  pch = pch,
                  plot = plot,
                  ...)
}


#' Visualize normalization
#'
#' \code{plot_normalization} generates boxplots
#' of all conditions for input objects, e.g. before and after normalization.
#'
#' @param se SummarizedExperiment,
#' Data object, e.g. before normalization (output from \code{\link{make_se}()}
#' or \code{\link{make_se_parse}()}).
#' @param ... Additional SummarizedExperiment object(s),
#' E.g. data object after normalization
#' (output from \code{\link{normalize_vsn}}).
#' @return Boxplots of all conditions
#' for input objects, e.g. before and after normalization
#'   (generated by \code{\link[ggplot2]{ggplot}}).
#' Adding components and other plot adjustments can be easily done
#' using the ggplot2 syntax (i.e. using '+')
#' @examples
#' # Load example
#' data(Silicosis_pg)
#' data <- Silicosis_pg
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Construct SE
#' ecols <- grep("LFQ.", colnames(data_unique))
#' se <- make_se_parse(data_unique, ecols,mode = "delim")
#'
#' # Filter and normalization
#' filt <- filter_se(se, thr = 0, fraction = 0.4, filter_formula = ~ Reverse != "+" & Potential.contaminant!="+")
#' norm <- normalize_vsn(filt)
#'
#' # Plot normalization
#' plot_normalization(se, filt, norm)
#' @export
plot_normalization <- function(se, ...) {
  # Get arguments from call
  call <- match.call()
  arglist <- lapply(call[-1], function(x) x)
  var.names <- vapply(arglist, deparse, character(1))
  arglist <- lapply(arglist, eval.parent, n = 2)
  names(arglist) <- var.names

  # Show error if inputs are not the required classes
  lapply(arglist, function(x) {
    assertthat::assert_that(inherits(x,
                                     "SummarizedExperiment"),
                            msg = "input objects need to be of class 'SummarizedExperiment'")
    if (any(!c("label", "ID", "condition", "replicate") %in% colnames(colData(x)))) {
      stop("'label', 'ID', 'condition' and/or 'replicate' ",
           "columns are not present in (one of) the input object(s)",
           "\nRun make_se() or make_se_parse() to obtain the required columns",
           call. = FALSE)
    }
  })

  # Function to get a long data.frame of the assay data
  # annotated with sample info
  gather_join <- function(se) {
    assay(se) %>%
      data.frame() %>%
      gather(ID, val) %>%
      left_join(., data.frame(colData(se)), by = "ID")
  }

  df <- map_df(arglist, gather_join, .id = "var") %>%
    mutate(var = factor(var, levels = names(arglist)))

  # Boxplots for conditions with facet_wrap
  # for the original and normalized values
  ggplot(df, aes(x = ID, y = val, fill = condition)) +
    geom_boxplot(notch = TRUE, na.rm = TRUE) +
    coord_flip() +
    facet_wrap(~var, ncol = 1) +
    labs(x = "", y = expression(log[2]~"Intensity")) +
    theme_DEP1()
}

#' Visualize imputation
#'
#' \code{plot_imputation} generates density plots
#' of all conditions for input objects, e.g. before and after imputation.
#'
#' @param se SummarizedExperiment,
#' Data object, e.g. before imputation
#' (output from \code{\link{normalize_vsn}()}).
#' @param ... Other SummarizedExperiment object(s),
#' E.g. data object after imputation
#' (output from \code{\link{impute}()}).
#' @return Density plots of all conditions
#' of all conditions for input objects, e.g. before and
#' after imputation (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data(Silicosis_pg)
#' data <- Silicosis_pg
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Construct SE
#' ecols <- grep("LFQ.", colnames(data_unique))
#' se <- make_se_parse(data_unique, ecols,mode = "delim")
#'
#' # Filter and normalization
#' filt <- filter_se(se, thr = 0, fraction = 0.4, filter_formula = ~ Reverse != "+" & Potential.contaminant!="+")
#' norm <- normalize_vsn(filt)
#'
#' # imputation
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Plot imputation
#' plot_imputation(filt, norm, imputed)
#' @export
plot_imputation <- function(se, ...) {
  # Get arguments from call
  call <- match.call()
  arglist <- lapply(call[-1], function(x) x)
  var.names <- vapply(arglist, deparse, character(1))
  arglist <- lapply(arglist, eval.parent, n = 2)
  names(arglist) <- var.names

  # Show error if inputs are not the required classes
  lapply(arglist, function(x) {
    assertthat::assert_that(inherits(x,
                                     "SummarizedExperiment"),
                            msg = "input objects need to be of class 'SummarizedExperiment'")
    if (any(!c("label", "ID", "condition", "replicate") %in% colnames(colData(x)))) {
      stop("'label', 'ID', 'condition' and/or 'replicate' ",
           "columns are not present in (one of) the input object(s)",
           "\nRun make_se() or make_se_parse() to obtain the required columns",
           call. = FALSE)
    }
  })

  # Function to get a long data.frame of the assay data
  # annotated with sample info
  gather_join <- function(se) {
    assay(se) %>%
      data.frame() %>%
      gather(ID, val) %>%
      left_join(., data.frame(colData(se)), by = "ID")
  }

  df <- map_df(arglist, gather_join, .id = "var") %>%
    mutate(var = factor(var, levels = names(arglist)))

  # Density plots for different conditions with facet_wrap
  # for original and imputed samles
  ggplot(df, aes(val, col = condition)) +
    geom_density(na.rm = TRUE) +
    facet_wrap(~var, ncol = 1) +
    labs(x = expression(log[2]~"Intensity"), y = "Density") +
    theme_DEP1()
}

#' Visualize intensities of proteins with missing values
#'
#' \code{plot_detect} generates density and CumSum plots
#' of protein intensities with and without missing values
#'
#' @param se SummarizedExperiment,
#' Data object with missing values.
#' @return Density and CumSum plots of intensities of
#' proteins with and without missing values
#' (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data(Silicosis_pg)
#' data <- Silicosis_pg
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Construct SE
#' ecols <- grep("LFQ.", colnames(data_unique))
#' se <- make_se_parse(data_unique, ecols,mode = "delim")
#'
#' # Filter
#' filt <- filter_se(se, thr = 0, fraction = 0.4, filter_formula = ~ Reverse != "+" & Potential.contaminant!="+")
#'
#' # Plot intensities of proteins with missing values
#' plot_detect(filt)
#' @export
plot_detect <- function(se) {
  # Show error if inputs are not the required classes
  assertthat::assert_that(inherits(se, "SummarizedExperiment"))

  se_assay <- assay(se)
  # Show error if there are no missing values
  if(!any(is.na(se_assay))) {
    stop("No missing values in '", deparse(substitute(se)), "'",
         call. = FALSE)
  }

  # Get a long data.frame of the assay data annotated with sample info
  df <- se_assay %>%
    data.frame() %>%
    rownames_to_column() %>%
    gather(ID, val, -rowname)

  # Get a summarized table with mean protein intensities and
  # indication whether the protein has missing values
  stat <- df %>%
    group_by(rowname) %>%
    summarize(mean = mean(val, na.rm = TRUE), missval = any(is.na(val)))

  # Calculate cumulative fraction
  cumsum <- stat %>%
    group_by(missval) %>%
    arrange(mean) %>%
    mutate(num = 1, cs = cumsum(num), cs_frac = cs/n())

  # Plot the densities and cumalitive fractions for
  # proteins with and without missing values
  p1 <- ggplot(stat, aes(mean, col = missval)) +
    geom_density(na.rm = TRUE) +
    labs(x = expression(log[2]~"Intensity"), y = "Density") +
    guides(col = guide_legend(title = "Missing values")) +
    theme_DEP1()
  p2 <- ggplot(cumsum, aes(mean, cs_frac, col = missval)) +
    geom_line() +
    labs(x = expression(log[2]~"Intensity"), y = "Cumulative fraction") +
    guides(col = guide_legend(title = "Missing values")) +
    theme_DEP1()
  gridExtra::grid.arrange(p1, p2, ncol = 1)
}

#' Plot a heatmap of proteins with missing values
#'
#' \code{plot_missval} generates a heatmap of proteins
#' with missing values to discover whether values are missing by random or not.
#'
#' @param se SummarizedExperiment
#' @param ... Additional arguments for Heatmap function as depicted in
#' \code{\link[ComplexHeatmap]{Heatmap}}
#' Data object with missing values.
#' @return A heatmap indicating whether values are missing (0) or not (1)
#' (generated by \code{\link[ComplexHeatmap]{Heatmap}}).
#' @examples
#' # Load example
#' data(Silicosis_pg)
#' data <- Silicosis_pg
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Differential test
#' ecols <- grep("LFQ.", colnames(data_unique))
#' se <- make_se_parse(data_unique, ecols,mode = "delim")
#' filt <- filter_se(se, thr = 0, fraction = 0.4, filter_formula = ~ Reverse != "+" & Potential.contaminant!="+")
#'
#' # Plot missing values heatmap
#' plot_missval(filt)
#'
#' @export
plot_missval <- function (se, ...)
{
  assertthat::assert_that(inherits(se, "SummarizedExperiment"))
  se_assay <- assay(se)
  if (!any(is.na(se_assay))) {
    stop("No missing values in '", deparse(substitute(se)),
         "'", call. = FALSE)
  }
  df <- se_assay %>% data.frame(.)
  missval <- df[apply(df, 1, function(x) any(is.na(x))), ]
  missval <- ifelse(is.na(missval), 0, 1)
  ht2 = ComplexHeatmap::Heatmap(missval, col = c("white", "black"),
                                column_names_side = "top", show_row_names = FALSE,
                                show_column_names = TRUE, name = "Missing values pattern",
                                column_names_gp = gpar(fontsize = 16), heatmap_legend_param = list(at = c(0,
                                                                                                          1), labels = c("Missing value", "Valid value")), ...)
  draw(ht2, heatmap_legend_side = "top")
}
